## Replication code for: Katsumata, Hiroto and Shunya Noda. 2023. 
## "'Kick Them Out' as a Voting Strategy: Theory and Evidence from Multi-member District Elections." 
## The Journal of Politics, forthcoming.

## Data wrangling for the analysis of the survey data from the 2004 General Election in Romania
## Author: Hiroto Katsumata
## Date: May, 2023

## Load packages
library(tidyverse)

## Initial setting
options(stringsAsFactors = FALSE)

## Load functions
source("functions/kickout_functions.R")


## Data wrangling ====================

## Read data
cses2 <- read.csv("data/cses2.csv", header = TRUE)
rou2004seat <- read.csv("data/rou2004seat.csv", header = TRUE)
rou2004share <- read.csv("data/rou2004share.csv", header = TRUE) %>%
								dplyr::select(-name)
totalseat <- c(132, 113, 47, 22)
names(totalseat) <- c("partyA", "partyB", "partyD", "partyE")

## Load and rename data
rou2 <- cses2 %>%
				filter(B1004 == "ROU_2004") %>%
				rename(id = B1009,
						   age = B2001,
						   dist = B2031,
						   vote0 = B3006_1,
						   likeA = B3037_A,
						   likeB = B3037_B,
						   likeC = B3037_C,
						   likeD = B3037_D,
						   likeE = B3037_E,
						   likeF = B3037_F,
						   magnitude = B4001,
						   shareA = B4004_A,
						   shareB = B4004_B,
						   shareD = B4004_D,
						   shareE = B4004_E)

## Recode NAs
rou2[rou2 == 96] <- NA
rou2[rou2 == 97] <- NA
rou2[rou2 == 98] <- NA
rou2[rou2 == 99] <- NA
rou2[rou2 == 997] <- NA
rou2[rou2 == 999] <- NA

## Rename and recode data
rou2 <- rou2 %>%
				mutate(gender = as.factor(B2002),
							 ethnicity = as.factor(B2029),
							 university = B2003 >= 8) %>%
				mutate(partyID = ifelse(B3030 == 0, 99, ifelse(B3030 != 5, B3029_1, B3033)),
							 partyID = ifelse(partyID %in% c(1, 8, 9), 1, 
							 						ifelse(partyID %in% c(2, 6, 7), 2, 
							 						 ifelse(partyID == 3, 3, 
							 						 	ifelse(partyID == 4, 4, 99))))) %>%
				dplyr::select(., -starts_with("B"))

## District data
rou2004seat <- rou2 %>% 
							 dplyr::select(dist, shareA, shareB, shareD, shareE) %>% 
							 group_by(dist) %>% 
							 summarise_all(mean) %>% 
							 ungroup() %>%
							 as.data.frame() %>%
							 rbind(., rou2004share) %>%
							 right_join(., rou2004seat, by = "dist") %>%
							 mutate(mag = seatA + seatB + seatD + seatE)
## The correct magnitude for Bucharest is 28, but we do not use this district in the analysis

## Calculate seat allocation in the first step
Hare2seat <- matrix(, nrow = nrow(rou2004seat), ncol = 4)
Hare2reminder <- matrix(, nrow = nrow(rou2004seat), ncol = 4)
Hare2r_seat <- numeric(nrow(rou2004seat))
for (i in 1:nrow(rou2004seat)) {
	res <- Hare2(magnitude = rou2004seat$mag[i], 
							 share = unlist(rou2004seat[i, c("shareA", "shareB", "shareD", "shareE")]), 
							 totalvote = rou2004seat$totalvote[i])
	Hare2seat[i, ] <- res$seat
	Hare2reminder[i, ] <- res$reminder
	Hare2r_seat[i] <- res$r_seat
}

## The number of remaining seats to be allocated to each party after the first step
totalseat - apply(Hare2seat, 2, sum)
## Those divided by the number of remaining votes to be used after the first step
(totalseat - apply(Hare2seat, 2, sum)) / apply(Hare2reminder, 2, sum) * 1e8
## Those for the Party A, B, and D are not so much different,
## which means that uVpn / Spn is almost the same (due to the d'Hondt method).
## Thus, voters should expect that Vpc determines seat allocation after the first step 
## as in the simple Hare method.

## Calculate seat allocation by the simple Hare method
Harerou2 <- rou2004seat %>% 
						rename(magnitude = mag) %>%
						PRseatROU2004(.)

## Check whether seat allocations under the exact method and the simple Hare method coincide
left_join(rou2004seat, Harerou2, by = "dist") %>%
filter(seatA != HseatA | seatB != HseatB | seatD != HseatD | seatE != HseatE)

## Merge datasets
rou2 <- rou2 %>%
				dplyr::select(-c(shareA, shareB, shareD, shareE)) %>%
				left_join(., rou2004seat, by = "dist") %>%
				left_join(., Harerou2, by = "dist") %>%
				## Exclude districts whose seat allocation under the simple Hare method
				## does not coincide with actual one
				filter(seatA == HseatA) %>%
				filter(seatB == HseatB) %>%
				filter(seatD == HseatD) %>%
				filter(seatE == HseatE) %>%
				filter(is.na(likeA) + is.na(likeB) + is.na(likeC) + 
						   is.na(likeD) + is.na(likeE) + is.na(likeF) < 6) %>%
				group_by(id) %>%
				mutate(like1 = mean2(c(likeA, likeF), na.rm = TRUE),
			   			 like2 = mean2(c(likeB, likeC), na.rm = TRUE)) %>%
				mutate(fourthlike = sort(c(like1, like2, likeD, likeE), decreasing = TRUE)[4]) %>%
				mutate(votelike = ifelse(vote0 == 1, like1,
													 ifelse(vote0 == 2, like2,
							  						ifelse(vote0 == 3, as.numeric(likeD),
							  						 ifelse(vote0 == 4, as.numeric(likeE), NA))))) %>%
				ungroup() %>%
				filter(votelike > fourthlike) %>% ## Exclude respondents who vote for their least preferred party
				mutate(voterunup = ifelse(runup == "A" & vote0 == 1, 1, 
														ifelse(runup == "B" & vote0 == 2, 1, 
														 ifelse(runup == "D" & vote0 == 3, 1, 
							 	 							ifelse(runup == "E" & vote0 == 4, 1, 0))))) %>%
				mutate(likerunup = ifelse(runup == "A", like1, 
													  ifelse(runup == "B", like2, 
												     ifelse(runup == "D", likeD, 
									 					  ifelse(runup == "E", likeE, NA))))) %>%
				mutate(votewin = ifelse(vote0 == 1, seatA > 0, 
													ifelse(vote0 == 2, seatB > 0,
													 ifelse(vote0 == 3, seatD > 0,
													 	ifelse(vote0 == 4, seatE > 0, NA))))) %>%
				mutate(like1win = ifelse(seatA > 0 & runup != "A", like1, NA),
						   like2win = ifelse(seatB > 0 & runup != "B", like2, NA),
			  			 likeDwin = ifelse(seatD > 0 & runup != "D", likeD, NA),
			   			 likeEwin = ifelse(seatE > 0 & runup != "E", likeE, NA)) %>%
				mutate(like1comp = ifelse(nocompA != 0, like1, NA),
						   like2comp = ifelse(nocompB != 0, like2, NA),
			  			 likeDcomp = ifelse(nocompD != 0, likeD, NA),
			   			 likeEcomp = ifelse(nocompE != 0, likeE, NA)) %>%
				group_by(id) %>%
				mutate(likecompfirst = sort(c(like1comp, like2comp, likeDcomp, likeEcomp), 
																		decreasing = TRUE)[1],
							 likecompsecond = sort(c(like1comp, like2comp, likeDcomp, likeEcomp), 
																		 decreasing = TRUE)[2],
							 likecompthird = sort(c(like1comp, like2comp, likeDcomp, likeEcomp), 
																		decreasing = TRUE)[3],
							 likecompfourth = sort(c(like1comp, like2comp, likeDcomp, likeEcomp), 
																		 decreasing = TRUE)[4]) %>%
				mutate(likenotrunupwinfirst = sort(c(like1win, like2win, likeDwin, likeEwin), 
																				 decreasing = TRUE)[1]) %>%
				mutate(likenotrunupwinsecond = sort(c(like1win, like2win, likeDwin, likeEwin), 
																				 decreasing = TRUE)[2]) %>%
				mutate(likenotrunupwinleast = min2(c(like1win, like2win, likeDwin, likeEwin), na.rm = TRUE)) %>%
				ungroup() %>%
				mutate(votelastwin = ifelse(lastwin == "A" & vote0 == 1, 1, 
															ifelse(lastwin == "B" & vote0 == 2, 1, 
														   ifelse(lastwin == "D" & vote0 == 3, 1, 
							 	 								ifelse(lastwin == "E" & vote0 == 4, 1, 0))))) %>%
				mutate(votecompfirst = (votelike == likecompfirst),
							 votecompsecond = (votelike == likecompsecond),
							 votenotrunupwinfirst = (votelike == likenotrunupwinfirst),
							 votenotrunupwinsecond = (votelike == likenotrunupwinsecond),
							 likecompleast = ifelse(n_comp == 4, likecompfourth, likecompthird))

## Recode NAs
rou2$like2win[is.nan(rou2$like2win) == 1] <- NA

## Check district magnitude
rou2 %>% 
group_by(dist) %>% 
summarise(m = mean(magnitude)) %>% 
ungroup() %>% 
with(., table(m))

## Select variables used in the main analysis
rou2 <- rou2 %>%
				dplyr::select(c(age, dist, magnitude, gender, ethnicity, university, partyID, n_comp,
												voterunup, likerunup, votewin, 
												likecompfirst, likecompsecond, likecompthird, likecompfourth, 
												likenotrunupwinfirst, likenotrunupwinsecond, likenotrunupwinleast, 
												votelastwin, votecompfirst, votecompsecond, 
												votenotrunupwinfirst, votenotrunupwinsecond, 
												likecompleast))

## Save dataset rou2 as "rou2.csv" in "data" folder
write.csv(x = rou2, file = "data/rou2.csv", row.names = FALSE, fileEncoding = "UTF-8")
